#!/bin/bash -x

function set_cpu_performance()
{
        CPU_NUM=`cat /proc/cpuinfo | grep processor | wc -l`
        echo "set cpu peformance mode" | tee -a ${LOG}
        for((i=0;i<CPU_NUM;i++))
        do
                if [ -f "/sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor" ];then
                        echo performance > /sys/devices/system/cpu/cpu${i}/cpufreq/scaling_governor | tee -a ${LOG}
                fi
        done
}

set_cpu_performance

GPUS_PER_NODE=16
NNODES=$1
NODE_RANK=$2
MASTER_ADDR=$3
MASTER_PORT=12345

LOG_DIR="logs_$(date +%m%d%H%M%S)"
mkdir -p ${LOG_DIR}

export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_ALGO=Ring
export OMP_NUM_THREADS=4

FORCE_TORCHRUN=1 NNODES=${NNODES} NODE_RANK=${NODE_RANK} MASTER_ADDR=${MASTER_ADDR} MASTER_PORT=${MASTER_PORT} llamafactory-cli train configs/train_full/qwen2_72b_full_sft_mg.yaml 2>&1 | tee ${LOG_DIR}/train_rank${NODE_RANK}.log
